To quantitatively examine the efficacy of vegetation restoration in drylands globally.
Study-level viz to document patterns in exclusions primarily and the relatie frequenices, at the study level, of major categories of evidence.
#study data####
library(tidyverse)
studies <- read_csv("data/studies.csv")
studies
## # A tibble: 278 x 18
## ID title technique data region exclude rationale observations
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 152 Shor… seeding,… expe… Africa no <NA> <NA>
## 2 180 Rest… chemical… App.… Africa no <NA> <NA>
## 3 229 Infl… soil see… fiel… Africa no <NA> <NA>
## 4 230 Acti… planting fiel… Africa no <NA> <NA>
## 5 255 The … grazing … fiel… Africa no <NA> <NA>
## 6 262 Reve… seeding,… eper… Africa no <NA> <NA>
## 7 263 The … phytogen… fiel… Africa no <NA> <NA>
## 8 264 Eval… seeding,… fiel… Africa no <NA> <NA>
## 9 271 Patc… natural … fiel… Africa no <NA> <NA>
## 10 4 Fact… natural … App.… Africa no <NA> <NA>
## # ... with 268 more rows, and 10 more variables: disturbance <chr>,
## # system <chr>, goal <chr>, intervention <chr>, paradigm <chr>,
## # grazing <chr>, hypothesis <chr>, soil <chr>, benchmark <chr>,
## # notes <chr>
#quick look at rationale needed
exclusions <- studies %>%
filter(exclude == "yes")
#quick look at studies with paradigms
evidence <- studies %>%
filter(exclude == "no")
#library(skimr)
#skim(evidence)
#study-level viz#####
#exclusions
#ggplot(exclusions, aes(rationale, fill = region)) +
# geom_bar() +
# coord_flip() +
# labs(x = "rational for exclusion", y = "frequency") +
# scale_fill_brewer(palette = "Paired")
ggplot(evidence, aes(disturbance, fill = paradigm)) +
geom_bar(na.rm = TRUE) +
coord_flip() +
scale_fill_brewer(palette = "Paired") +
labs(y = "frequency")
#ggplot(evidence, aes(region, fill = paradigm)) +
# geom_bar(na.rm = TRUE) +
# coord_flip() +
# scale_fill_brewer(palette = "Paired") +
# labs(y = "frequency")
#ggplot(evidence, aes(data, fill = paradigm)) +
# geom_bar(na.rm = TRUE) +
#coord_flip() +
#scale_fill_brewer(palette = "Paired") +
#labs(y = "frequency")
#ggplot(evidence, aes(system, fill = paradigm)) +
# geom_bar(na.rm = TRUE) +
# coord_flip() +
# scale_fill_brewer(palette = "Paired") +
# labs(y = "frequency")
#ggplot(evidence, aes(goal, fill = paradigm)) +
#geom_bar(na.rm = TRUE) +
#coord_flip() +
#scale_fill_brewer(palette = "Paired") +
#labs(x = "outcome", y = "frequency")
#step 1 models####
#paradigm
derived.evidence <- evidence %>%
group_by(technique, data, region, disturbance, goal, paradigm) %>% summarise(n = n())
#active-passive split
m <- glm(n~paradigm, family = poisson, derived.evidence)
anova(m, test="Chisq")
## Analysis of Deviance Table
##
## Model: poisson, link: log
##
## Response: n
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 167 9.9147
## paradigm 1 0.045115 166 9.8696 0.8318
#region
m1 <- glm(n~paradigm*region, family = poisson, derived.evidence)
#m1
#summary(m1)
anova(m1, test="Chisq")
## Analysis of Deviance Table
##
## Model: poisson, link: log
##
## Response: n
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 167 9.9147
## paradigm 1 0.045115 166 9.8696 0.8318
## region 6 0.301367 160 9.5682 0.9995
## paradigm:region 6 0.213627 154 9.3546 0.9998
#outcome
m2 <- glm(n~paradigm*goal, family = poisson, derived.evidence)
#m1
#summary(m1)
anova(m2, test="Chisq")
## Analysis of Deviance Table
##
## Model: poisson, link: log
##
## Response: n
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 167 9.9147
## paradigm 1 0.045115 166 9.8696 0.8318
## goal 6 0.240941 160 9.6287 0.9997
## paradigm:goal 4 0.301480 156 9.3272 0.9897
#even split between active and passive evidence by all key categories
A summary of sort process using PRISMA.
library(PRISMAstatement)
prisma(found = 1504,
found_other = 5,
no_dupes = 1039,
screened = 1039,
screen_exclusions = 861,
full_text = 178,
full_text_exclusions = 101,
qualitative = 77,
quantitative = 66,
width = 800, height = 800)
Check data and calculate necessary measures.
#all data
#data <- read_csv("data/data.csv")
#data <- data %>%
#mutate(lrr = log(mean.t/mean.c), rii = ((mean.t-mean.c)/(mean.t + mean.c)), var.es = ((sd.t^2/n.t*mean.t^2) + (sd.c^2/n.c*mean.c^2)))
#other effect size estimates
#library(compute.es)
#data <- data %>%
#mutate(d=mes(mean.t, mean.c, sd.t, sd.c, n.t, n.c, level = 95, #cer = 0.2, dig = 2, , id = ID, data = data))
#check metafor
#data from ag and grazing studies that examined restoration in drylands
paperdata <- read_csv("data/paper_data.csv")
paperdata <- paperdata %>%
mutate(lrr = log(mean.t/mean.c), rii = ((mean.t-mean.c)/(mean.t + mean.c)), var.es = ((sd.t^2/n.t*mean.t^2) + (sd.c^2/n.c*mean.c^2))) #use only lrr now
Explore summary level data of all data. Explore aggregation levels that support the most reasonable data structure and minimize non-independence issues.
#evidence map####
require(maps)
world<-map_data("world")
map<-ggplot() + geom_polygon(data=world, fill="gray50", aes(x=long, y=lat, group=group))
#map + geom_point(data=paperdata, aes(x=long, y=lat)) +
#labs(x = "longitude", y = "latitude") #render a literal map, i.e. evidence map, of where we study the niche in deserts globally
#add in levels and color code points on map####
map + geom_point(data=paperdata, aes(x=long, y=lat, color = paradigm)) +
scale_color_brewer(palette = "Paired") +
labs(x = "longitude", y = "latitude", color = "")
#aggregation####
se <- function(x){
sd(x)/sqrt(length(x))
}
data.simple <- paperdata %>%
group_by(study.ID, paradigm, technique, measure.success) %>%
summarise(n = n(), mean.lrr = mean(lrr), mean.rii = mean(rii), mean.var = mean(var.es))
main.data <- paperdata %>%
group_by(study.ID, paradigm, intervention, outcome) %>%
summarise(n = n(), mean.lrr = mean(lrr), mean.rii = mean(rii), mean.var = mean(var.es))
#EDA data####
simple.data <- paperdata %>% group_by(study.ID, paradigm, technique, measure.success) %>% summarise(mean.rii = mean(rii), error = se(rii))
simple.data <- na.omit(simple.data)
parad.data <- paperdata %>% group_by(study.ID, paradigm) %>% summarise(mean.rii = mean(rii), error = se(rii))
parad.data <- na.omit(parad.data)
tech.data <- paperdata %>% group_by(study.ID, technique) %>% summarise(mean.rii = mean(rii), error = se(rii))
tech.data <- na.omit(tech.data)
tech.data
## # A tibble: 31 x 4
## # Groups: study.ID [31]
## study.ID technique mean.rii error
## <dbl> <chr> <dbl> <dbl>
## 1 13 planting -0.00830 0.0423
## 2 51 planting, grazing exclusion 0.128 0.0982
## 3 66 seeding, shrub facilitation 0.0762 0.0275
## 4 68 planting -0.177 0.0548
## 5 69 grazing exclusion 0.502 0.0749
## 6 77 mowing, grazing 0.0613 0.472
## 7 87 fertilization, biostimulants, seeding -0.0138 0.0137
## 8 88 seeding -0.130 0.0153
## 9 95 seeding, planting -0.228 0.0415
## 10 104 mycorrhizal 0.307 0.0230
## # ... with 21 more rows
success.data <- paperdata %>% group_by(study.ID, measure.success) %>% summarise(mean.rii = mean(rii), error = se(rii))
success.data <- na.omit(success.data)
#active
active <- paperdata %>%
filter(paradigm == "active")
#viz for aggregation####
disturbance.data <- paperdata %>%
group_by(study.ID,disturbance, paradigm) %>%
count()
disturbance.data2 <- disturbance.data %>%
group_by(disturbance,paradigm) %>%
count()
ggplot(na.omit(disturbance.data2), aes(disturbance,nn, fill=paradigm))+
geom_bar(stat = "identity") +
coord_flip(ylim=0:44) +
scale_fill_brewer(palette = "Blues")
intervention.data <- active %>%
group_by(study.ID,intervention, paradigm) %>%
count()
intervention.data2 <- intervention.data %>%
group_by(intervention,paradigm) %>%
count()
#ggplot(na.omit(intervention.data2), aes(intervention,nn, fill=paradigm)) +
#geom_bar(stat = "identity") +
#coord_flip(ylim=0:44) +
#scale_fill_brewer(palette = "Blues")
technique.data <- paperdata %>%
group_by(study.ID,technique, paradigm) %>%
count()
technique.data2 <- technique.data %>%
group_by(technique,paradigm) %>%
count()
technique.data3 <- technique.data2 %>%
group_by(technique) %>%
count()
ggplot(na.omit(data.simple), aes(technique, n, fill = paradigm)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_brewer(palette = "Paired")
ggplot(na.omit(data.simple), aes(measure.success, n, fill = paradigm)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_brewer(palette = "Paired")
#better
ggplot(main.data, aes(intervention, n, fill = paradigm)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_brewer(palette = "Paired") +
labs(fill = "")
ggplot(main.data, aes(outcome, n, fill = paradigm)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_brewer(palette = "Paired") +
labs(fill = "")
Exploratory data analyses to understand data and QA/QC using Rii.
Meta and conventional statistical models to explore relative efficacy.
Step 5. Synthesis stats
#p-value meta
#library(metap)
#all data for metas but cleaned for na's
#all data for meta cleaned
mdata.all <- paperdata %>%
filter(!is.na(lrr)) %>%
filter(!is.na(var.es)) %>%
filter(!is.na(n.t)) %>%
filter(!is.na(p)) %>%
filter(!is.na(intervention)) %>%
filter(is.finite(lrr))
#active only data for meta cleaned up
mdata <- paperdata %>%
filter(paradigm == "active") %>%
filter(!is.na(lrr)) %>%
filter(!is.na(var.es)) %>%
filter(!is.na(n.t)) %>%
filter(!is.na(p)) %>%
filter(!is.na(intervention)) %>%
filter(is.finite(lrr))
#aggregated data for metas var estimated with central tendency
#note - could also bootstrap mean variance here instead of arithematic mean
simple.mdata <- mdata %>%
group_by(intervention) %>%
summarise(lrr = mean(lrr), var.es = mean(var.es), n = mean(n.t))
simple.mdata.2 <- mdata %>%
group_by(intervention, outcome) %>%
summarise(lrr = mean(lrr), var.es = mean(var.es), n = mean(n.t))
simple.mdata.var <- mdata %>%
group_by(intervention) %>%
summarise(lrr = mean(lrr), var.es = se(var.es), n = mean(n.t))
simple.mdata2.var <- mdata %>%
group_by(intervention, outcome) %>%
summarise(lrr = mean(lrr), var.es = se(var.es), n = mean(n.t))
#metas with p-values####
#schweder(mdata$p)
#sumz(p, data = mdata)
#mdata %>%
#split(.$intervention) %>%
#purrr::map(~sumz(.$p))
#sumlog(mdata$p)
#metas with meta package on effect size measures####
library(meta)
#all data non-aggregated
#m <- metagen(lrr, var.es, studlab = ID, byvar = intervention, data = mdata)
#summary(m)
#funnel(m)
#metabias(m)
#forest(m, sortvar = intervention)
m <- metagen(lrr, var.es, studlab = ID, byvar = intervention, subset = paradigm == "active", data = mdata.all)
summary(m)
## Number of studies combined: k = 665
##
## 95%-CI z p-value
## Fixed effect model -0.0063 [-0.0063; -0.0063] -40590292.09 0
## Random effects model 0.2266 [ 0.1715; 0.2816] 8.07 < 0.0001
##
## Quantifying heterogeneity:
## tau^2 = 0.1848; H = 21775022.93 [21775022.02; 21775023.84]; I^2 = 100.0% [100.0%; 100.0%]
##
## Test of heterogeneity:
## Q d.f. p-value
## 314836678046932224.00 664 0
##
## Results for subgroups (fixed effect model):
## k 95%-CI
## intervention = vegetation 442 -0.0063 [-0.0063; -0.0063]
## intervention = soil 148 0.7206 [ 0.7204; 0.7209]
## intervention = water addition 75 0.0037 [ 0.0025; 0.0049]
## Q tau^2 I^2
## intervention = vegetation 314836678003004992.00 0.1848 100.0%
## intervention = soil 2625290.13 0.0583 100.0%
## intervention = water addition 109.93 <0.0001 32.7%
##
## Test for subgroup differences (fixed effect model):
## Q d.f. p-value
## Between groups 41301796.69 2 0
## Within groups 314836678005630400.00 662 0
##
## Results for subgroups (random effects model):
## k 95%-CI
## intervention = vegetation 442 0.2160 [ 0.1549; 0.2771]
## intervention = soil 148 0.2970 [ 0.2183; 0.3756]
## intervention = water addition 75 0.0073 [-0.0012; 0.0158]
## Q tau^2 I^2
## intervention = vegetation 314836678003004992.00 0.1848 100.0%
## intervention = soil 2625290.13 0.0583 100.0%
## intervention = water addition 109.93 <0.0001 32.7%
##
## Test for subgroup differences (random effects model):
## Q d.f. p-value
## Between groups 94.11 2 < 0.0001
##
## Details on meta-analytical method:
## - Inverse variance method
## - DerSimonian-Laird estimator for tau^2
m <- metagen(lrr, var.es, studlab = ID, byvar = intervention, subset = paradigm == "passive", data = mdata.all)
summary(m)
## Number of studies combined: k = 330
##
## 95%-CI z p-value
## Fixed effect model -0.0463 [-0.0463; -0.0463] -1181815.78 0
## Random effects model -0.3632 [-0.4637; -0.2628] -7.09 < 0.0001
##
## Quantifying heterogeneity:
## tau^2 = 0.4167; H = 864963.56 [864962.52; 864964.60]; I^2 = 100.0% [100.0%; 100.0%]
##
## Test of heterogeneity:
## Q d.f. p-value
## 246145284902920.38 329 0
##
## Results for subgroups (fixed effect model):
## k 95%-CI
## intervention = vegetation 108 0.4187 [ 0.4187; 0.4187]
## intervention = grazing exclusion 25 0.3958 [ 0.3958; 0.3959]
## intervention = soil 197 -0.6888 [-0.6888; -0.6888]
## Q tau^2 I^2
## intervention = vegetation 28560273455660.51 0.0968 100.0%
## intervention = grazing exclusion 16640578.62 0.0096 100.0%
## intervention = soil 23314502087392.00 0.0951 100.0%
##
## Test for subgroup differences (fixed effect model):
## Q d.f. p-value
## Between groups 194270492719289.25 2 0
## Within groups 51874792183631.12 327 0
##
## Results for subgroups (random effects model):
## k 95%-CI
## intervention = vegetation 108 0.1542 [ 0.0294; 0.2789]
## intervention = grazing exclusion 25 0.2869 [ 0.2260; 0.3478]
## intervention = soil 197 -0.4834 [-0.5410; -0.4259]
## Q tau^2 I^2
## intervention = vegetation 28560273455660.51 0.0968 100.0%
## intervention = grazing exclusion 16640578.62 0.0096 100.0%
## intervention = soil 23314502087392.00 0.0951 100.0%
##
## Test for subgroup differences (random effects model):
## Q d.f. p-value
## Between groups 341.41 2 < 0.0001
##
## Details on meta-analytical method:
## - Inverse variance method
## - DerSimonian-Laird estimator for tau^2
m <- metagen(lrr, var.es, studlab = ID, byvar = paradigm, data = mdata.all)
summary(m)
## Number of studies combined: k = 995
##
## 95%-CI z p-value
## Fixed effect model -0.0063 [-0.0063; -0.0063] -40594636.11 0
## Random effects model 0.0036 [-0.0393; 0.0465] 0.17 0.8686
##
## Quantifying heterogeneity:
## tau^2 = 0.1848; H = 17804094.53 [17804093.80; 17804095.26]; I^2 = 100.0% [100.0%; 100.0%]
##
## Test of heterogeneity:
## Q d.f. p-value
## 315083867351259776.00 994 0
##
## Results for subgroups (fixed effect model):
## k 95%-CI Q
## paradigm = active 665 -0.0063 [-0.0063; -0.0063] 314836678046932224.00
## paradigm = passive 330 -0.0463 [-0.0463; -0.0463] 246145284902920.38
## tau^2 I^2
## paradigm = active 0.1848 100.0%
## paradigm = passive 0.4167 100.0%
##
## Test for subgroup differences (fixed effect model):
## Q d.f. p-value
## Between groups 1044019424645.23 1 0
## Within groups 315082823331835136.00 993 0
##
## Results for subgroups (random effects model):
## k 95%-CI Q
## paradigm = active 665 0.2266 [ 0.1715; 0.2816] 314836678046932224.00
## paradigm = passive 330 -0.3632 [-0.4637; -0.2628] 246145284902920.38
## tau^2 I^2
## paradigm = active 0.1848 100.0%
## paradigm = passive 0.4167 100.0%
##
## Test for subgroup differences (random effects model):
## Q d.f. p-value
## Between groups 101.85 1 < 0.0001
##
## Details on meta-analytical method:
## - Inverse variance method
## - DerSimonian-Laird estimator for tau^2
#m.study <- metagen(lrr, var.es, studlab = study.ID, byvar = intervention, data = mdata)
#summary(m.study)
#funnel(m)
#metabias(m)
#forest(m, sortvar = intervention)
#aggregated data
#m1 <- metagen(lrr, var.es, studlab = intervention, data = simple.mdata)
#summary(m1)
#funnel(m1)
#metabias(m1)
#forest(m, sortvar = intervention)
#different var estimate
#m2 <- metagen(lrr, var.es, studlab = intervention, data = simple.mdata.var)
#summary(m2)
#funnel(m2)
#metabias(m2)
#forest(m, sortvar = intervention)
#m3 <- metagen(lrr, var.es, studlab = intervention, byvar = outcome, data = simple.mdata.2)
#summary(m3)
#funnel(m)
#radial(m3)
#metabias(m2)
#forest(m, sortvar = intervention)
#m4 <- metagen(lrr, var.es, studlab = intervention, byvar = outcome, data = simple.mdata2.var)
#summary(m4)
#funnel(m)
#radial(m4)
#metabias(m2)
#forest(m, sortvar = intervention)
#forest(m1, sortvar = intervention, fontsize = 10, xlim = c(-2, 2))
#forest(m2, sortvar = intervention, fontsize = 10, xlim = c(-2, 2))
#forest(m3, sortvar = intervention, fontsize = 10, xlim = c(-2, 2))
#forest(m4, sortvar = intervention, fontsize = 10, xlim = c(-2, 2))
#t-tests for lrr diff from 0
#mdata %>%
#split(.$intervention) %>%
#purrr::map(~sumz(.$lrr))
#effect sizes plots
#need better ci estimates
#ggplot(simple.mdata, aes(intervention, lrr)) +
# ylim(c(-2,2)) +
# geom_point(position = position_dodge(width = 0.5)) +
# labs(x = "", y = "lrr") +
# coord_flip() +
# geom_errorbar(aes(ymin=lrr-var.es, ymax=lrr+var.es), width=.05, position = position_dodge(width = 0.5)) +
# geom_hline(yintercept = 0, colour="grey", linetype = "longdash")
#ggplot(simple.mdata.2, aes(intervention, lrr, color = outcome)) +
# ylim(c(-2,2)) +
#geom_point(position = position_dodge(width = 0.5)) +
# labs(x = "", y = "lrr", color = "") +
# coord_flip() +
#geom_errorbar(aes(ymin=lrr-var.es, ymax=lrr+var.es), width=.05, position = #position_dodge(width = 0.5)) +
#geom_hline(yintercept = 0, colour="grey", linetype = "longdash")
#ggplot(mdata, aes(lrr, color = intervention)) +
#geom_freqpoly(binwidth = 0.5, size = 2) +
#xlim(c(-10, 10)) +
#labs(x = "lrr", y = "frequency", color = "") +
#geom_vline(xintercept = 0, colour="grey", linetype = "longdash") +
#scale_color_brewer()
#ggplot(mdata, aes(lrr, fill = intervention)) +
#geom_dotplot(binwidth = 1) +
# xlim(c(-10, 10)) +
# labs(x = "lrr", y = "frequency", fill = "") +
# geom_vline(xintercept = 0, colour="grey", linetype = "longdash") +
# scale_fill_brewer()